# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
from ComicScra.items import CsdnItem


class ComicscraPipeline(object):
    def process_item(self, item, spider):
        return item
import json
class CsdnPipeline(object):
    def process_item(self, item, spider):
        url = item['url']
        us = url.split('/')[3:]
        file_name = '_'.join(us)
        file_path = 'f:\\a\\' +file_name
        with open(file_path,"wb") as a:
            json.dump({k:v if not isinstance(v,list) else '\r\n'.join([x.encode("utf-8") for x in v if x.strip()]) for k,v in item.items()},a,ensure_ascii=False)

        return item


import requests
import os
import settings

class ImageDownloadPipeline(object):
    def process_item(self, item, spider):
        if 'image_url' in item:

            dir_path = '%s/%s/%s' % (settings.IMAGES_STORE, spider.name,item['book_name'])

            if not os.path.exists(dir_path):
                os.makedirs(dir_path)
            img_url = item['image_url']
            img_name = item['page']
            if img_url.startswith('//'):
                img_url = "http:"+img_url
            ext_name = img_url.rsplit('.',1)[1]
            file_name = "%s.%s"%(img_name,ext_name)
            file_path = dir_path + "/" + file_name
            #file_path = file_path.encode('utf-8')
            if not os.path.exists(file_path) or os.path.getsize(file_path) < 1024:
                print file_path
                with open(file_path, 'wb') as handle:
                    response = requests.get(img_url, stream=True
                    ,headers={'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"})
                    for block in response.iter_content(1024):
                        if not block:
                            break
                        handle.write(block)
        return item
